#### "How do past repression and indoctrination affect redistributive preferences?"  ####
# authors: "Pelke, Lars"
# date: 2019-10-23
# written under "R version 3.6.0 (2019-03-11)"

#### Libraries ####

library(countrycode)
library(tidyverse)
library(viridis)
library(readstata13)
library(lme4)
library(performance)
library(sjPlot)
require(Amelia)


#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

#### Load data ####

merged_data <- readRDS("data/merged_data_master.rds")


#### Drop Data #####

summary(merged_data$v2exl_legitideol_5)
summary(merged_data$v2x_clphy_5)
summary(merged_data$s_far_Maddison_gdppc_1990_estim_5)
summary(merged_data$s_far_Maddison_pop_estimate_5)
summary(merged_data$sex)
summary(merged_data$education_3)
summary(merged_data$unemployed)


mean.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(mean(v, na.rm=T)) }
}

overview_na_macro_data <- merged_data %>% 
  group_by(data, wave, year, iso3n) %>%
  summarize(mean_e_migdppcln = mean.new(e_migdppcln),
            mean_v2x_polyarchy = mean.new(v2x_polyarchy)) 

overview_na_micro_data <- merged_data %>% 
  group_by(data, wave, year, country_name) %>%
  summarize(mean_sex = mean.new(sex),
            mean_education_3 = mean.new(education_3), 
            mean_unemployed = mean.new(unemployed)) 

merged_data_full <- merged_data %>%
  drop_na(v2x_polyarchy, autocracy, e_migdppcln, s_far_Maddison_pop_estimate_5,
          s_far_Maddison_gdppc_1990_estim_5, v2x_clphy_5, v2exl_legitideol_5, autocracy_5, 
          sex, education_3, unemployed)


#### Construct Multidimensional Redistributive Demand Index ####

sum.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(sum(v, na.rm=T)) }
}

merged_data_full <- merged_data_full %>%
  rowwise() %>%
  mutate(red_pref = sum(government_resp, income_equality, na.rm = TRUE)/2) %>%
  mutate(red_pref = ifelse(is.na(government_resp) & is.na(income_equality), NA,red_pref ))

sum(is.na(merged_data_full$red_pref))
summary(merged_data_full$red_pref)

merged_data_full <- merged_data_full %>%
  drop_na(red_pref)

#### Delete Cohorts with less than 1000 persons 

merged_data_full %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_full <- merged_data_full %>%
  filter(cohort_5.x >= 1910) # delete all respondents which birth cohort is before 1910


#### Extract countries with more >= 10 years of observations ####

merged_data_full2 <- merged_data_full %>%
  group_by(country_name) %>%
  mutate(num_years = max(year) - min(year)) %>%
  filter(num_years >= 10) %>%
  ungroup()

table(merged_data_full2$num_years)

#### Extract countries with more than three surveys #####

num_surveys <- merged_data_full2 %>%
  ungroup() %>%
  group_by(country_name, year) %>%
  count()

num_surveys <- num_surveys %>%
  ungroup() %>%
  select(country_name) 

counts <- data.frame(table(num_surveys))

counts <- counts %>%
  filter(Freq >= 3)

counts$num_surveys <- factor(counts$num_surveys)

counts <- counts %>%
  rename(country_name = num_surveys)

country_list <- counts$country_name

merged_data_full2 <- merged_data_full2 %>%
  ungroup() %>%
  dplyr::filter(country_name %in% country_list)

## drop observations with less than 10 individuals per cohort and country ##

count_cohort_countries <- merged_data_full2 %>%
  group_by(cohortmatch5_15, country_name) %>%
  count() %>%
  rename(indidivuals_per_country = n) %>%
  mutate(country_cohort = str_c(country_name, cohortmatch5_15, sep = "_")) %>%
  dplyr::filter(indidivuals_per_country >=10 )

country_cohort_list <- count_cohort_countries$country_cohort # list of country-years with enough data 

# filter merged_data_full 2 with above list 

merged_data_full2 <- merged_data_full2 %>%
  ungroup() %>%
  mutate(country_cohort = str_c(country_name, cohortmatch5_15, sep = "_")) 

class(merged_data_full2$country_cohort)

merged_data_full2 <- merged_data_full2 %>%
  ungroup()%>%
  dplyr::filter(country_cohort %in%country_cohort_list)

#### Delete Germany, problems with merging socialization variables ####

merged_data_full2 <- merged_data_full2 %>%
  ungroup()%>%
  dplyr::filter(country_name != "Germany")


num_waves <- merged_data_full2 %>%
  group_by(country_name, year) %>%
  summarise(wave = max(wave), 
            number_i = n(),
            v2x_regime = max(v2x_regime)) %>%
  mutate(v2x_regime = case_when(v2x_regime== 0 ~"Closed Autocracy", 
                                v2x_regime== 1 ~"Electoral Autocracy", 
                                v2x_regime== 2 ~"Electoral Democracy", 
                                v2x_regime== 3 ~"Liberal Democracy"))

library(stargazer)
stargazer(num_waves, summary=FALSE)

num_countries <-  merged_data_full2 %>% 
  group_by(country_name) %>%
  count()

stargazer(num_countries, summary=FALSE)


merged_data_full2 <- merged_data_full2 %>%
  select(-cohort_5.y) %>%
  rename(cohort_5 = cohort_5.x)

library(foreign)
write.dta(merged_data_full2, "data/robustness_data_without_imputation.dta", version = 12,
          convert.dates = TRUE, tz = "GMT")



